#' ---
#' title: Reproduce Appendix Figure B2 (Manifesto Ideology Application, Continuous Score Prompt)
#' date: 2024-10-06
#' version: 1.0
#' ---

library(tidyverse)
library(promptr)
set.seed(42)

## 1. Read and tidy data ----------------------------------

# read in sentence-level data
sentences <- read_csv('application3-one-shot-gpt-3-policy.csv')

# aggregate to the manifesto level
manifestos <- sentences |> 
  group_by(manifestoid) |> 
  summarize(text = paste(sentence_text, collapse = ' '))

## 2. Scale Economic and Social Ideology -----------------------------

from_file <- TRUE

if(!from_file){
  
  # format prompts
  economic_instructions <- "You will be provided with a text from a party manifesto. Where does this text stand on the ‘left’ to ‘right’ wing scale, in terms of economic policy? Provide your response as a score between 0 and 100 where 0 means ‘Extremely left’ and 100 means ‘Extremely right’. If the text does not refer to economic policy, return \"NA\". Respond *only* with your score."
  
  manifestos$economic_prompt <- lapply(manifestos$text,
                                       format_chat,
                                       system_message = economic_instructions)
  
  social_instructions <- "You will be provided with a text from a party manifesto. Where does this text stand on the ‘liberal’ to ‘conservative’ scale, in terms of social policy? Provide your response as a score between 0 and 100 where 0 means ‘Extremely liberal’ and 100 means ‘Extremely conservative’. If the text does not refer to social policy, return \"NA\". Respond *only* with your score."
  
  manifestos$social_prompt <- lapply(manifestos$text,
                                     format_chat,
                                     system_message = social_instructions)
  
  # query API
  system.time(manifestos$economic <- complete_chat(manifestos$economic_prompt, 
                                                   model = 'gpt-4-turbo-preview'))
  # 1 minute
  
  system.time(manifestos$social <- complete_chat(manifestos$social_prompt, 
                                                 model = 'gpt-4-turbo-preview'))
  # approximately 1 minute
  
  save(manifestos, file = 'appendix-B-manifestos-updated.RData')
} else{
  load('appendix-B-manifestos.RData')
}
# cost approximately $8.61 in total (April 2024 API pricing)

## 3. Compare to expert-coded ideology -------------------------------

manifestos$gpt4_economic <- manifestos$economic |>
  # keep only numeric responses
  lapply(filter, str_detect(token, '[0-9]')) |>
  lapply(mutate, token = as.numeric(token)) |>
  # compute average scores weighted by probability
  lapply(summarize, score = weighted.mean(token, probability)) |>
  unlist()

manifestos$gpt4_social <- manifestos$social |>
  # keep only numeric responses
  lapply(filter, str_detect(token, '[0-9]')) |>
  lapply(mutate, token = as.numeric(token)) |>
  # compute average scores weighted by probability
  lapply(summarize, score = weighted.mean(token, probability)) |>
  unlist()

benoit <- read_csv('application3-benoit-manifesto-estimates.csv')


manifestos <- manifestos |> 
  left_join(benoit, by = 'manifestoid')

library(ggrepel)
p_econ <- ggplot(data = manifestos,
                 mapping = aes(x=gpt4_economic,
                               y=expert_economic,
                               label=manifestoid)) +
  geom_text_repel() +
  labs(x = 'GPT-4 Economic Position',
       y = 'Expert Economic Position') + 
  theme_bw()

p_social <- ggplot(data = manifestos,
                   mapping = aes(x=gpt4_economic,
                                 y=expert_social,
                                 label=manifestoid)) +
  geom_text_repel() +
  labs(x = 'GPT-4 Social Position',
       y = 'Expert Social Position') + 
  theme_bw()

library(patchwork)

p <- p_econ + p_social

ggsave(plot = p,
       filename = 'figure-B2.png',
       width = 10, height = 5)

manifestos |> 
  summarize(cor(gpt4_economic, expert_economic),
            cor(crowd_economic, expert_economic))

manifestos |> 
  summarize(cor(gpt4_social, expert_social),
            cor(crowd_social, expert_social))
